Two-Sample MR

Updated 22.05.2024

working_dir <- "~/GEMMA/reviewed/MR_in"

exp_filename <- "iPSYCH_PGC_ASD_Nov2017__mr_in.tsv.gz"
out_filename <- "EA4_additive_excl_23andMe__mr_in.tsv.gz"

exp_name <- "ASD"
out_name <- "EA"


p_lim <- 0.05
# 5e-8 1e-5

r2_lim <- 0.2
library(TwoSampleMR)
## TwoSampleMR version 0.5.10 
## [>] New: Option to use non-European LD reference panels for clumping etc
## [>] Some studies temporarily quarantined to verify effect allele
## [>] See news(package='TwoSampleMR') and https://gwas.mrcieu.ac.uk for further details
## 
## Warning:
## You are running an old version of the TwoSampleMR package.
## This version:   0.5.10
## Latest version: 0.6.2
## Please consider updating using remotes::install_github('MRCIEU/TwoSampleMR')
library(data.table)
## Warning: package 'data.table' was built under R version 4.3.3
setwd(working_dir)

Data preparation

Loading the exposure data and selecting significant SNPs

setwd(working_dir)
exp_raw_full <- fread(file =exp_filename)
exp_raw <- subset(exp_raw_full,exp_raw_full$pval<p_lim)

exp_raw <- as.data.frame(exp_raw)
exp_raw$phen <- rep(exp_name, nrow(exp_raw))

exp_dat <- format_data( exp_raw,
    type = "exposure",
    snp_col = "SNP",
    beta_col = "beta",
    se_col = "se",
    effect_allele_col = "alt",
    other_allele_col = "ref",
    pval_col = "pval",
    eaf_col = "freq",
    phenotype_col = "phen",
    samplesize_col = "n"
)

Clumping the exposure variables

library(ieugwasr)
## 
## Warning:
## You are running an old version of the ieugwasr package.
## This version:   0.1.8
## Latest version: 1.0.0
## Please consider updating using remotes::install_github('MRCIEU/ieugwasr')
## OpenGWAS updates:
##   Date: 2024-05-17
##   [>] OpenGWAS is growing!
##   [>] Please take 2 minutes to give us feedback -
##   [>] It will help directly shape our emerging roadmap
##   [>] https://forms.office.com/e/eSr7EFAfCG
## 
## Attaching package: 'ieugwasr'
## The following object is masked from 'package:TwoSampleMR':
## 
##     ld_matrix
clumped_exp <- clump_data(exp_dat,clump_r2=r2_lim,pop="EAS")
## Please look at vignettes for options on running this locally if you need to run many instances of this command.
## Clumping mnKGEY, 523665 variants, using EAS population reference
## Removing 490766 of 523665 variants due to LD with other variants or absence from LD reference panel

!!! Clumping 6hkEUL, 351 variants, using EAS population reference Removing 346 of 351 variants due to LD with other variants or absence from LD reference panel

Outcome-data

setwd(working_dir)
out_raw_full <- fread(out_filename)

out_raw <- subset(out_raw_full,out_raw_full$pval<p_lim)
out_raw <- as.data.frame(out_raw)
out_raw$phen <- rep(out_name, nrow(out_raw))

out_dat <- format_data( out_raw,
    type = "outcome",
    snp_col = "SNP",
    beta_col = "beta",
    se_col = "se",
    effect_allele_col = "alt",
    other_allele_col = "ref",
    pval_col = "pval",
    eaf_col = "freq",
    phenotype_col = "phen",
    samplesize_col = "n"
    
)

Harmonizing data

harmonized_data <- harmonise_data(clumped_exp,out_dat,action=1)
## Harmonising ASD (mnKGEY) and EA (f6Isxc)
harmonized_data

MR analysis

res <- mr(harmonized_data)
## Analysing 'mnKGEY' on 'f6Isxc'
res

Sensitivity analysis

Heterogenity

mr_pleiotropy_res <- mr_pleiotropy_test(harmonized_data)
mr_pleiotropy_res

Single SNP and leave-one-out (not performed due to the large amount of SNPs)

Single SNP MR

res_single <- mr_singlesnp(harmonized_data)
res_single

leave-one-out MR

res_loo <- mr_leaveoneout(harmonized_data)
res_loo

Visualization

Scatter plot

res <- mr(harmonized_data)
## Analysing 'mnKGEY' on 'f6Isxc'
p1 <- mr_scatter_plot(res, harmonized_data)
p1[[1]]

LOO visual

res_loo <- mr_leaveoneout(harmonized_data)
p3 <- mr_leaveoneout_plot(res_loo)
p3[[1]]
## Warning: Using linewidth for a discrete variable is not advised.
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_errorbarh()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

Funnel plot (single)

res_single <- mr_singlesnp(harmonized_data)
p4 <- mr_funnel_plot(res_single)
p4[[1]]

MR Steiger directionality test

harmonized_data$"r.outcome" <- get_r_from_lor(
  harmonized_data$"beta.outcome",
  harmonized_data$"eaf.outcome",
  45383,
  132032,
  0.26,
  model = "logit",
  correction = FALSE
)

out <- directionality_test(harmonized_data)
## r.exposure and/or r.outcome not present.
## Calculating approximate SNP-exposure and/or SNP-outcome correlations, assuming all are quantitative traits. Please pre-calculate r.exposure and/or r.outcome using get_r_from_lor() for any binary traits
out

Identifying the lead SNPs

sc_plot <- mr_scatter_plot(res, harmonized_data)
sc_plot[[1]]

# Smallest outcome pval
lead_out_pval <- which(harmonized_data$pval.outcome == min(harmonized_data$pval.outcome)  )
print("Lead outcome Pval:")
## [1] "Lead outcome Pval:"
harmonized_data$SNP[[lead_out_pval]]
## [1] "rs2388334"
harmonized_data$pval.outcome[[lead_out_pval]]
## [1] 1.927e-62
harmonized_data[lead_out_pval, ]
# Smallest exposure pval
lead_exp_pval <- which(harmonized_data$pval.exposure == min(harmonized_data$pval.exposure)  )
print("Lead exposure Pval:")
## [1] "Lead exposure Pval:"
harmonized_data$SNP[[lead_exp_pval]]
## [1] "rs73128965"
harmonized_data$pval.exposure[[lead_exp_pval]]
## [1] 1.667e-08
harmonized_data[lead_exp_pval, ]
# Smallest pval for single SNP analysis
print("Lead single SNP Pval:")
## [1] "Lead single SNP Pval:"
n_max <- nrow(res_single) -2
lead_single_ind <- which(res_single$p == min(res_single$p[1:n_max])  )
res_single$SNP[[lead_single_ind]]
## [1] "rs2388334"
res_single$p[[lead_single_ind]]
## [1] 1.769462e-62
print(res_single[lead_single_ind, ])
##      exposure outcome id.exposure id.outcome samplesize       SNP         b
## 2262      ASD      EA      mnKGEY     f6Isxc     765000 rs2388334 0.4262871
##              se            p
## 2262 0.02555359 1.769462e-62